pwd
'C:\\Users\\VIPUL'
cd G:\ANTWALK\Python Project\Python Project Solution
pwd
File "C:\Users\VIPUL\AppData\Local\Temp/ipykernel_3768/2583361278.py", line 1 cd G:\ANTWALK\Python Project\Python Project Solution ^ SyntaxError: invalid syntax
cd G:\ANTWALK\Python Project\Python Project Solution
G:\ANTWALK\Python Project\Python Project Solution
pwd
'G:\\ANTWALK\\Python Project\\Python Project Solution'
import pandas as pd
df =pd.read_csv('G:\ANTWALK\Python Project\Python Project Solution\Given_Dataset\Customer Attrition Status.csv')
df
| RowNumber | CustomerId | Exited | |
|---|---|---|---|
| 0 | 1 | 15634602.0 | 1.0 |
| 1 | 2 | 15647311.0 | 0.0 |
| 2 | 3 | 15619304.0 | 1.0 |
| 3 | 4 | 15701354.0 | 0.0 |
| 4 | 5 | 15737888.0 | 0.0 |
| ... | ... | ... | ... |
| 10014 | 10015 | NaN | NaN |
| 10015 | 10016 | NaN | NaN |
| 10016 | 10017 | NaN | NaN |
| 10017 | 10018 | NaN | NaN |
| 10018 | 10019 | NaN | NaN |
10019 rows × 3 columns
df.shape
(10019, 3)
df.describe
<bound method NDFrame.describe of RowNumber CustomerId Exited 0 1 15634602.0 1.0 1 2 15647311.0 0.0 2 3 15619304.0 1.0 3 4 15701354.0 0.0 4 5 15737888.0 0.0 ... ... ... ... 10014 10015 NaN NaN 10015 10016 NaN NaN 10016 10017 NaN NaN 10017 10018 NaN NaN 10018 10019 NaN NaN [10019 rows x 3 columns]>
df.head(7)
| RowNumber | CustomerId | Exited | |
|---|---|---|---|
| 0 | 1 | 15634602.0 | 1.0 |
| 1 | 2 | 15647311.0 | 0.0 |
| 2 | 3 | 15619304.0 | 1.0 |
| 3 | 4 | 15701354.0 | 0.0 |
| 4 | 5 | 15737888.0 | 0.0 |
| 5 | 6 | 15574012.0 | 1.0 |
| 6 | 7 | 15592531.0 | 0.0 |
df.tail(7)
| RowNumber | CustomerId | Exited | |
|---|---|---|---|
| 10012 | 10013 | NaN | NaN |
| 10013 | 10014 | NaN | NaN |
| 10014 | 10015 | NaN | NaN |
| 10015 | 10016 | NaN | NaN |
| 10016 | 10017 | NaN | NaN |
| 10017 | 10018 | NaN | NaN |
| 10018 | 10019 | NaN | NaN |
df.isnull().sum()
RowNumber 0 CustomerId 19 Exited 19 dtype: int64
df
| RowNumber | CustomerId | Exited | |
|---|---|---|---|
| 0 | 1 | 15634602.0 | 1.0 |
| 1 | 2 | 15647311.0 | 0.0 |
| 2 | 3 | 15619304.0 | 1.0 |
| 3 | 4 | 15701354.0 | 0.0 |
| 4 | 5 | 15737888.0 | 0.0 |
| ... | ... | ... | ... |
| 10014 | 10015 | NaN | NaN |
| 10015 | 10016 | NaN | NaN |
| 10016 | 10017 | NaN | NaN |
| 10017 | 10018 | NaN | NaN |
| 10018 | 10019 | NaN | NaN |
10019 rows × 3 columns
df2 = df[df.isnull().any(axis=1)]
df2
| RowNumber | CustomerId | Exited | |
|---|---|---|---|
| 10000 | 10001 | NaN | NaN |
| 10001 | 10002 | NaN | NaN |
| 10002 | 10003 | NaN | NaN |
| 10003 | 10004 | NaN | NaN |
| 10004 | 10005 | NaN | NaN |
| 10005 | 10006 | NaN | NaN |
| 10006 | 10007 | NaN | NaN |
| 10007 | 10008 | NaN | NaN |
| 10008 | 10009 | NaN | NaN |
| 10009 | 10010 | NaN | NaN |
| 10010 | 10011 | NaN | NaN |
| 10011 | 10012 | NaN | NaN |
| 10012 | 10013 | NaN | NaN |
| 10013 | 10014 | NaN | NaN |
| 10014 | 10015 | NaN | NaN |
| 10015 | 10016 | NaN | NaN |
| 10016 | 10017 | NaN | NaN |
| 10017 | 10018 | NaN | NaN |
| 10018 | 10019 | NaN | NaN |
df2 = df[df.isnull().any(axis=1)]
df2
| RowNumber | CustomerId | Exited | |
|---|---|---|---|
| 10000 | 10001 | NaN | NaN |
| 10001 | 10002 | NaN | NaN |
| 10002 | 10003 | NaN | NaN |
| 10003 | 10004 | NaN | NaN |
| 10004 | 10005 | NaN | NaN |
| 10005 | 10006 | NaN | NaN |
| 10006 | 10007 | NaN | NaN |
| 10007 | 10008 | NaN | NaN |
| 10008 | 10009 | NaN | NaN |
| 10009 | 10010 | NaN | NaN |
| 10010 | 10011 | NaN | NaN |
| 10011 | 10012 | NaN | NaN |
| 10012 | 10013 | NaN | NaN |
| 10013 | 10014 | NaN | NaN |
| 10014 | 10015 | NaN | NaN |
| 10015 | 10016 | NaN | NaN |
| 10016 | 10017 | NaN | NaN |
| 10017 | 10018 | NaN | NaN |
| 10018 | 10019 | NaN | NaN |
df2.dropna(inplace= True)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\util\_decorators.py:311: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy return func(*args, **kwargs)
df2 = df[df.isnull().any(axis=1)].copy((deep=True)
df2
File "C:\Users\VIPUL\AppData\Local\Temp/ipykernel_3768/4034753541.py", line 1 df2 = df[df.isnull().any(axis=1)].copy((deep=True) ^ SyntaxError: invalid syntax
import warnings
warnings.filterwarnings('ignore')
df.isnull().sum()
RowNumber 0 CustomerId 19 Exited 19 dtype: int64
df2 = df[df.isnull().any(axis=1)]
df2
| RowNumber | CustomerId | Exited | |
|---|---|---|---|
| 10000 | 10001 | NaN | NaN |
| 10001 | 10002 | NaN | NaN |
| 10002 | 10003 | NaN | NaN |
| 10003 | 10004 | NaN | NaN |
| 10004 | 10005 | NaN | NaN |
| 10005 | 10006 | NaN | NaN |
| 10006 | 10007 | NaN | NaN |
| 10007 | 10008 | NaN | NaN |
| 10008 | 10009 | NaN | NaN |
| 10009 | 10010 | NaN | NaN |
| 10010 | 10011 | NaN | NaN |
| 10011 | 10012 | NaN | NaN |
| 10012 | 10013 | NaN | NaN |
| 10013 | 10014 | NaN | NaN |
| 10014 | 10015 | NaN | NaN |
| 10015 | 10016 | NaN | NaN |
| 10016 | 10017 | NaN | NaN |
| 10017 | 10018 | NaN | NaN |
| 10018 | 10019 | NaN | NaN |
df2.dropna(inplace= True)
df.isnull().sum()
RowNumber 0 CustomerId 19 Exited 19 dtype: int64
df2 = df[df.isnull().any(axis=1)]
df2
| RowNumber | CustomerId | Exited | |
|---|---|---|---|
| 10000 | 10001 | NaN | NaN |
| 10001 | 10002 | NaN | NaN |
| 10002 | 10003 | NaN | NaN |
| 10003 | 10004 | NaN | NaN |
| 10004 | 10005 | NaN | NaN |
| 10005 | 10006 | NaN | NaN |
| 10006 | 10007 | NaN | NaN |
| 10007 | 10008 | NaN | NaN |
| 10008 | 10009 | NaN | NaN |
| 10009 | 10010 | NaN | NaN |
| 10010 | 10011 | NaN | NaN |
| 10011 | 10012 | NaN | NaN |
| 10012 | 10013 | NaN | NaN |
| 10013 | 10014 | NaN | NaN |
| 10014 | 10015 | NaN | NaN |
| 10015 | 10016 | NaN | NaN |
| 10016 | 10017 | NaN | NaN |
| 10017 | 10018 | NaN | NaN |
| 10018 | 10019 | NaN | NaN |
df2.dropna(inplace= True)
df2 = df[df.isnull().any(axis=1)]
df2
| RowNumber | CustomerId | Exited | |
|---|---|---|---|
| 10000 | 10001 | NaN | NaN |
| 10001 | 10002 | NaN | NaN |
| 10002 | 10003 | NaN | NaN |
| 10003 | 10004 | NaN | NaN |
| 10004 | 10005 | NaN | NaN |
| 10005 | 10006 | NaN | NaN |
| 10006 | 10007 | NaN | NaN |
| 10007 | 10008 | NaN | NaN |
| 10008 | 10009 | NaN | NaN |
| 10009 | 10010 | NaN | NaN |
| 10010 | 10011 | NaN | NaN |
| 10011 | 10012 | NaN | NaN |
| 10012 | 10013 | NaN | NaN |
| 10013 | 10014 | NaN | NaN |
| 10014 | 10015 | NaN | NaN |
| 10015 | 10016 | NaN | NaN |
| 10016 | 10017 | NaN | NaN |
| 10017 | 10018 | NaN | NaN |
| 10018 | 10019 | NaN | NaN |
df3 = df[df.isnull().any(axis=1)]
df3
| RowNumber | CustomerId | Exited | |
|---|---|---|---|
| 10000 | 10001 | NaN | NaN |
| 10001 | 10002 | NaN | NaN |
| 10002 | 10003 | NaN | NaN |
| 10003 | 10004 | NaN | NaN |
| 10004 | 10005 | NaN | NaN |
| 10005 | 10006 | NaN | NaN |
| 10006 | 10007 | NaN | NaN |
| 10007 | 10008 | NaN | NaN |
| 10008 | 10009 | NaN | NaN |
| 10009 | 10010 | NaN | NaN |
| 10010 | 10011 | NaN | NaN |
| 10011 | 10012 | NaN | NaN |
| 10012 | 10013 | NaN | NaN |
| 10013 | 10014 | NaN | NaN |
| 10014 | 10015 | NaN | NaN |
| 10015 | 10016 | NaN | NaN |
| 10016 | 10017 | NaN | NaN |
| 10017 | 10018 | NaN | NaN |
| 10018 | 10019 | NaN | NaN |
df.tail(20)
| RowNumber | CustomerId | Exited | |
|---|---|---|---|
| 9999 | 10000 | 15628319.0 | 0.0 |
| 10000 | 10001 | NaN | NaN |
| 10001 | 10002 | NaN | NaN |
| 10002 | 10003 | NaN | NaN |
| 10003 | 10004 | NaN | NaN |
| 10004 | 10005 | NaN | NaN |
| 10005 | 10006 | NaN | NaN |
| 10006 | 10007 | NaN | NaN |
| 10007 | 10008 | NaN | NaN |
| 10008 | 10009 | NaN | NaN |
| 10009 | 10010 | NaN | NaN |
| 10010 | 10011 | NaN | NaN |
| 10011 | 10012 | NaN | NaN |
| 10012 | 10013 | NaN | NaN |
| 10013 | 10014 | NaN | NaN |
| 10014 | 10015 | NaN | NaN |
| 10015 | 10016 | NaN | NaN |
| 10016 | 10017 | NaN | NaN |
| 10017 | 10018 | NaN | NaN |
| 10018 | 10019 | NaN | NaN |
df2.dropna
df2
| RowNumber | CustomerId | Exited | |
|---|---|---|---|
| 10000 | 10001 | NaN | NaN |
| 10001 | 10002 | NaN | NaN |
| 10002 | 10003 | NaN | NaN |
| 10003 | 10004 | NaN | NaN |
| 10004 | 10005 | NaN | NaN |
| 10005 | 10006 | NaN | NaN |
| 10006 | 10007 | NaN | NaN |
| 10007 | 10008 | NaN | NaN |
| 10008 | 10009 | NaN | NaN |
| 10009 | 10010 | NaN | NaN |
| 10010 | 10011 | NaN | NaN |
| 10011 | 10012 | NaN | NaN |
| 10012 | 10013 | NaN | NaN |
| 10013 | 10014 | NaN | NaN |
| 10014 | 10015 | NaN | NaN |
| 10015 | 10016 | NaN | NaN |
| 10016 | 10017 | NaN | NaN |
| 10017 | 10018 | NaN | NaN |
| 10018 | 10019 | NaN | NaN |
df = df.dropna(axis=0)
print(df)
RowNumber CustomerId Exited 0 1 15634602.0 1.0 1 2 15647311.0 0.0 2 3 15619304.0 1.0 3 4 15701354.0 0.0 4 5 15737888.0 0.0 ... ... ... ... 9995 9996 15606229.0 0.0 9996 9997 15569892.0 0.0 9997 9998 15584532.0 1.0 9998 9999 15682355.0 1.0 9999 10000 15628319.0 0.0 [10000 rows x 3 columns]
df.describe
<bound method NDFrame.describe of RowNumber CustomerId Exited 0 1 15634602.0 1.0 1 2 15647311.0 0.0 2 3 15619304.0 1.0 3 4 15701354.0 0.0 4 5 15737888.0 0.0 ... ... ... ... 9995 9996 15606229.0 0.0 9996 9997 15569892.0 0.0 9997 9998 15584532.0 1.0 9998 9999 15682355.0 1.0 9999 10000 15628319.0 0.0 [10000 rows x 3 columns]>
df.tail(20)
| RowNumber | CustomerId | Exited | |
|---|---|---|---|
| 9980 | 9981 | 15719276.0 | 0.0 |
| 9981 | 9982 | 15672754.0 | 1.0 |
| 9982 | 9983 | 15768163.0 | 1.0 |
| 9983 | 9984 | 15656710.0 | 0.0 |
| 9984 | 9985 | 15696175.0 | 0.0 |
| 9985 | 9986 | 15586914.0 | 0.0 |
| 9986 | 9987 | 15581736.0 | 0.0 |
| 9987 | 9988 | 15588839.0 | 0.0 |
| 9988 | 9989 | 15589329.0 | 0.0 |
| 9989 | 9990 | 15605622.0 | 0.0 |
| 9990 | 9991 | 15798964.0 | 0.0 |
| 9991 | 9992 | 15769959.0 | 1.0 |
| 9992 | 9993 | 15657105.0 | 0.0 |
| 9993 | 9994 | 15569266.0 | 0.0 |
| 9994 | 9995 | 15719294.0 | 0.0 |
| 9995 | 9996 | 15606229.0 | 0.0 |
| 9996 | 9997 | 15569892.0 | 0.0 |
| 9997 | 9998 | 15584532.0 | 1.0 |
| 9998 | 9999 | 15682355.0 | 1.0 |
| 9999 | 10000 | 15628319.0 | 0.0 |
df.isnull().sum()
RowNumber 0 CustomerId 0 Exited 0 dtype: int64
df.head(6)
| RowNumber | CustomerId | Exited | |
|---|---|---|---|
| 0 | 1 | 15634602.0 | 1.0 |
| 1 | 2 | 15647311.0 | 0.0 |
| 2 | 3 | 15619304.0 | 1.0 |
| 3 | 4 | 15701354.0 | 0.0 |
| 4 | 5 | 15737888.0 | 0.0 |
| 5 | 6 | 15574012.0 | 1.0 |
df
| RowNumber | CustomerId | Exited | |
|---|---|---|---|
| 0 | 1 | 15634602.0 | 1.0 |
| 1 | 2 | 15647311.0 | 0.0 |
| 2 | 3 | 15619304.0 | 1.0 |
| 3 | 4 | 15701354.0 | 0.0 |
| 4 | 5 | 15737888.0 | 0.0 |
| ... | ... | ... | ... |
| 9995 | 9996 | 15606229.0 | 0.0 |
| 9996 | 9997 | 15569892.0 | 0.0 |
| 9997 | 9998 | 15584532.0 | 1.0 |
| 9998 | 9999 | 15682355.0 | 1.0 |
| 9999 | 10000 | 15628319.0 | 0.0 |
10000 rows × 3 columns
dfdemog =pd.read_csv('G:\ANTWALK\Python Project\Python Project Solution\Given_Dataset\Customer Demographics.csv')
dfdemog
| RowNumber | CustomerId | Surname | Geography | Gender | Age | |
|---|---|---|---|---|---|---|
| 0 | 1 | 15634602.0 | Hargrave | West | Female | 42.0 |
| 1 | 2 | 15647311.0 | Hill | Central | Female | 41.0 |
| 2 | 3 | 15619304.0 | Onio | West | Female | 42.0 |
| 3 | 4 | 15701354.0 | Boni | West | Female | 39.0 |
| 4 | 5 | 15737888.0 | Mitchell | Central | Female | 43.0 |
| ... | ... | ... | ... | ... | ... | ... |
| 10014 | 10015 | NaN | NaN | NaN | NaN | NaN |
| 10015 | 10016 | NaN | NaN | NaN | NaN | NaN |
| 10016 | 10017 | NaN | NaN | NaN | NaN | NaN |
| 10017 | 10018 | NaN | NaN | NaN | NaN | NaN |
| 10018 | 10019 | NaN | NaN | NaN | NaN | NaN |
10019 rows × 6 columns
dfdemog.tail(20)
| RowNumber | CustomerId | Surname | Geography | Gender | Age | |
|---|---|---|---|---|---|---|
| 9999 | 10000 | 15628319.0 | Walker | West | Female | 28.0 |
| 10000 | 10001 | NaN | NaN | NaN | NaN | NaN |
| 10001 | 10002 | NaN | NaN | NaN | NaN | NaN |
| 10002 | 10003 | NaN | NaN | NaN | NaN | NaN |
| 10003 | 10004 | NaN | NaN | NaN | NaN | NaN |
| 10004 | 10005 | NaN | NaN | NaN | NaN | NaN |
| 10005 | 10006 | NaN | NaN | NaN | NaN | NaN |
| 10006 | 10007 | NaN | NaN | NaN | NaN | NaN |
| 10007 | 10008 | NaN | NaN | NaN | NaN | NaN |
| 10008 | 10009 | NaN | NaN | NaN | NaN | NaN |
| 10009 | 10010 | NaN | NaN | NaN | NaN | NaN |
| 10010 | 10011 | NaN | NaN | NaN | NaN | NaN |
| 10011 | 10012 | NaN | NaN | NaN | NaN | NaN |
| 10012 | 10013 | NaN | NaN | NaN | NaN | NaN |
| 10013 | 10014 | NaN | NaN | NaN | NaN | NaN |
| 10014 | 10015 | NaN | NaN | NaN | NaN | NaN |
| 10015 | 10016 | NaN | NaN | NaN | NaN | NaN |
| 10016 | 10017 | NaN | NaN | NaN | NaN | NaN |
| 10017 | 10018 | NaN | NaN | NaN | NaN | NaN |
| 10018 | 10019 | NaN | NaN | NaN | NaN | NaN |
dfdemog.describe
<bound method NDFrame.describe of RowNumber CustomerId Surname Geography Gender Age 0 1 15634602.0 Hargrave West Female 42.0 1 2 15647311.0 Hill Central Female 41.0 2 3 15619304.0 Onio West Female 42.0 3 4 15701354.0 Boni West Female 39.0 4 5 15737888.0 Mitchell Central Female 43.0 ... ... ... ... ... ... ... 10014 10015 NaN NaN NaN NaN NaN 10015 10016 NaN NaN NaN NaN NaN 10016 10017 NaN NaN NaN NaN NaN 10017 10018 NaN NaN NaN NaN NaN 10018 10019 NaN NaN NaN NaN NaN [10019 rows x 6 columns]>
dfdemog.isnull().sum()
RowNumber 0 CustomerId 19 Surname 19 Geography 19 Gender 19 Age 19 dtype: int64
dfdemog.dropna
<bound method DataFrame.dropna of RowNumber CustomerId Surname Geography Gender Age 0 1 15634602.0 Hargrave West Female 42.0 1 2 15647311.0 Hill Central Female 41.0 2 3 15619304.0 Onio West Female 42.0 3 4 15701354.0 Boni West Female 39.0 4 5 15737888.0 Mitchell Central Female 43.0 ... ... ... ... ... ... ... 10014 10015 NaN NaN NaN NaN NaN 10015 10016 NaN NaN NaN NaN NaN 10016 10017 NaN NaN NaN NaN NaN 10017 10018 NaN NaN NaN NaN NaN 10018 10019 NaN NaN NaN NaN NaN [10019 rows x 6 columns]>
dfdemog.isnull().sum()
RowNumber 0 CustomerId 19 Surname 19 Geography 19 Gender 19 Age 19 dtype: int64
df3 = dfdemog.dropna(axis=0)
print(df3)
RowNumber CustomerId Surname Geography Gender Age 0 1 15634602.0 Hargrave West Female 42.0 1 2 15647311.0 Hill Central Female 41.0 2 3 15619304.0 Onio West Female 42.0 3 4 15701354.0 Boni West Female 39.0 4 5 15737888.0 Mitchell Central Female 43.0 ... ... ... ... ... ... ... 9995 9996 15606229.0 Obijiaku West Male 39.0 9996 9997 15569892.0 Johnstone West Male 35.0 9997 9998 15584532.0 Liu West Female 36.0 9998 9999 15682355.0 Sabbatini East Male 42.0 9999 10000 15628319.0 Walker West Female 28.0 [10000 rows x 6 columns]
dfdemog
| RowNumber | CustomerId | Surname | Geography | Gender | Age | |
|---|---|---|---|---|---|---|
| 0 | 1 | 15634602.0 | Hargrave | West | Female | 42.0 |
| 1 | 2 | 15647311.0 | Hill | Central | Female | 41.0 |
| 2 | 3 | 15619304.0 | Onio | West | Female | 42.0 |
| 3 | 4 | 15701354.0 | Boni | West | Female | 39.0 |
| 4 | 5 | 15737888.0 | Mitchell | Central | Female | 43.0 |
| ... | ... | ... | ... | ... | ... | ... |
| 10014 | 10015 | NaN | NaN | NaN | NaN | NaN |
| 10015 | 10016 | NaN | NaN | NaN | NaN | NaN |
| 10016 | 10017 | NaN | NaN | NaN | NaN | NaN |
| 10017 | 10018 | NaN | NaN | NaN | NaN | NaN |
| 10018 | 10019 | NaN | NaN | NaN | NaN | NaN |
10019 rows × 6 columns
dfdemog = df3
df3 =pd.merge(df,dfdemog,on='CustomerId')
df3
| RowNumber_x | CustomerId | Exited | RowNumber_y | Surname | Geography | Gender | Age | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 15634602.0 | 1.0 | 1 | Hargrave | West | Female | 42.0 |
| 1 | 2 | 15647311.0 | 0.0 | 2 | Hill | Central | Female | 41.0 |
| 2 | 3 | 15619304.0 | 1.0 | 3 | Onio | West | Female | 42.0 |
| 3 | 4 | 15701354.0 | 0.0 | 4 | Boni | West | Female | 39.0 |
| 4 | 5 | 15737888.0 | 0.0 | 5 | Mitchell | Central | Female | 43.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 9995 | 9996 | 15606229.0 | 0.0 | 9996 | Obijiaku | West | Male | 39.0 |
| 9996 | 9997 | 15569892.0 | 0.0 | 9997 | Johnstone | West | Male | 35.0 |
| 9997 | 9998 | 15584532.0 | 1.0 | 9998 | Liu | West | Female | 36.0 |
| 9998 | 9999 | 15682355.0 | 1.0 | 9999 | Sabbatini | East | Male | 42.0 |
| 9999 | 10000 | 15628319.0 | 0.0 | 10000 | Walker | West | Female | 28.0 |
10000 rows × 8 columns
df
| RowNumber | CustomerId | Exited | |
|---|---|---|---|
| 0 | 1 | 15634602.0 | 1.0 |
| 1 | 2 | 15647311.0 | 0.0 |
| 2 | 3 | 15619304.0 | 1.0 |
| 3 | 4 | 15701354.0 | 0.0 |
| 4 | 5 | 15737888.0 | 0.0 |
| ... | ... | ... | ... |
| 9995 | 9996 | 15606229.0 | 0.0 |
| 9996 | 9997 | 15569892.0 | 0.0 |
| 9997 | 9998 | 15584532.0 | 1.0 |
| 9998 | 9999 | 15682355.0 | 1.0 |
| 9999 | 10000 | 15628319.0 | 0.0 |
10000 rows × 3 columns
df3
| RowNumber_x | CustomerId | Exited | RowNumber_y | Surname | Geography | Gender | Age | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 15634602.0 | 1.0 | 1 | Hargrave | West | Female | 42.0 |
| 1 | 2 | 15647311.0 | 0.0 | 2 | Hill | Central | Female | 41.0 |
| 2 | 3 | 15619304.0 | 1.0 | 3 | Onio | West | Female | 42.0 |
| 3 | 4 | 15701354.0 | 0.0 | 4 | Boni | West | Female | 39.0 |
| 4 | 5 | 15737888.0 | 0.0 | 5 | Mitchell | Central | Female | 43.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 9995 | 9996 | 15606229.0 | 0.0 | 9996 | Obijiaku | West | Male | 39.0 |
| 9996 | 9997 | 15569892.0 | 0.0 | 9997 | Johnstone | West | Male | 35.0 |
| 9997 | 9998 | 15584532.0 | 1.0 | 9998 | Liu | West | Female | 36.0 |
| 9998 | 9999 | 15682355.0 | 1.0 | 9999 | Sabbatini | East | Male | 42.0 |
| 9999 | 10000 | 15628319.0 | 0.0 | 10000 | Walker | West | Female | 28.0 |
10000 rows × 8 columns
df3 =pd.merge(df,dfdemog,on='CustomerId',how='left')
df3
| RowNumber_x | CustomerId | Exited | RowNumber_y | Surname | Geography | Gender | Age | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 15634602.0 | 1.0 | 1 | Hargrave | West | Female | 42.0 |
| 1 | 2 | 15647311.0 | 0.0 | 2 | Hill | Central | Female | 41.0 |
| 2 | 3 | 15619304.0 | 1.0 | 3 | Onio | West | Female | 42.0 |
| 3 | 4 | 15701354.0 | 0.0 | 4 | Boni | West | Female | 39.0 |
| 4 | 5 | 15737888.0 | 0.0 | 5 | Mitchell | Central | Female | 43.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 9995 | 9996 | 15606229.0 | 0.0 | 9996 | Obijiaku | West | Male | 39.0 |
| 9996 | 9997 | 15569892.0 | 0.0 | 9997 | Johnstone | West | Male | 35.0 |
| 9997 | 9998 | 15584532.0 | 1.0 | 9998 | Liu | West | Female | 36.0 |
| 9998 | 9999 | 15682355.0 | 1.0 | 9999 | Sabbatini | East | Male | 42.0 |
| 9999 | 10000 | 15628319.0 | 0.0 | 10000 | Walker | West | Female | 28.0 |
10000 rows × 8 columns
df4= df3.drop(['RowNumber_y'],axis = 1)
df4
| RowNumber_x | CustomerId | Exited | Surname | Geography | Gender | Age | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | 15634602.0 | 1.0 | Hargrave | West | Female | 42.0 |
| 1 | 2 | 15647311.0 | 0.0 | Hill | Central | Female | 41.0 |
| 2 | 3 | 15619304.0 | 1.0 | Onio | West | Female | 42.0 |
| 3 | 4 | 15701354.0 | 0.0 | Boni | West | Female | 39.0 |
| 4 | 5 | 15737888.0 | 0.0 | Mitchell | Central | Female | 43.0 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 9995 | 9996 | 15606229.0 | 0.0 | Obijiaku | West | Male | 39.0 |
| 9996 | 9997 | 15569892.0 | 0.0 | Johnstone | West | Male | 35.0 |
| 9997 | 9998 | 15584532.0 | 1.0 | Liu | West | Female | 36.0 |
| 9998 | 9999 | 15682355.0 | 1.0 | Sabbatini | East | Male | 42.0 |
| 9999 | 10000 | 15628319.0 | 0.0 | Walker | West | Female | 28.0 |
10000 rows × 7 columns
dfInv1 =pd.read_csv('G:\ANTWALK\Python Project\Python Project Solution\Given_Dataset\Customer Investment Snapshot.csv')
dfInv1
| RowNumber | CustomerId | Tenure | CreditScore | Balance | EstimatedSalary | |
|---|---|---|---|---|---|---|
| 0 | 1 | 15634602.0 | 2.0 | 619.0 | NaN | 101348.88 |
| 1 | 2 | 15647311.0 | 1.0 | 608.0 | 83807.86 | 112542.58 |
| 2 | 3 | 15619304.0 | 8.0 | 502.0 | 159660.80 | 113931.57 |
| 3 | 4 | 15701354.0 | 1.0 | 699.0 | NaN | 93826.63 |
| 4 | 5 | 15737888.0 | 2.0 | 850.0 | 125510.82 | 79084.10 |
| ... | ... | ... | ... | ... | ... | ... |
| 10014 | 10015 | NaN | NaN | NaN | NaN | NaN |
| 10015 | 10016 | NaN | NaN | NaN | NaN | NaN |
| 10016 | 10017 | NaN | NaN | NaN | NaN | NaN |
| 10017 | 10018 | NaN | NaN | NaN | NaN | NaN |
| 10018 | 10019 | NaN | NaN | NaN | NaN | NaN |
10019 rows × 6 columns
dfInv1.describe
<bound method NDFrame.describe of RowNumber CustomerId Tenure CreditScore Balance EstimatedSalary 0 1 15634602.0 2.0 619.0 NaN 101348.88 1 2 15647311.0 1.0 608.0 83807.86 112542.58 2 3 15619304.0 8.0 502.0 159660.80 113931.57 3 4 15701354.0 1.0 699.0 NaN 93826.63 4 5 15737888.0 2.0 850.0 125510.82 79084.10 ... ... ... ... ... ... ... 10014 10015 NaN NaN NaN NaN NaN 10015 10016 NaN NaN NaN NaN NaN 10016 10017 NaN NaN NaN NaN NaN 10017 10018 NaN NaN NaN NaN NaN 10018 10019 NaN NaN NaN NaN NaN [10019 rows x 6 columns]>
dfInv1.isnull().sum()
RowNumber 0 CustomerId 19 Tenure 19 CreditScore 22 Balance 3636 EstimatedSalary 19 dtype: int64
df6 = dfInv1.dropna(CustomerId = 'null')
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) ~\AppData\Local\Temp/ipykernel_3768/1289050864.py in <module> ----> 1 df6 = dfInv1.dropna(CustomerId = 'null') C:\ProgramData\Anaconda3\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs) 309 stacklevel=stacklevel, 310 ) --> 311 return func(*args, **kwargs) 312 313 return wrapper TypeError: dropna() got an unexpected keyword argument 'CustomerId'
df5 = dfInv.dropna(['CustomerId'],axis = 1)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) ~\AppData\Local\Temp/ipykernel_3768/124880380.py in <module> ----> 1 df5 = dfInv.dropna(['CustomerId'],axis = 1) NameError: name 'dfInv' is not defined
df5= dfInv1["CreditScore"].fillna(method ='ffill', inplace = True)
dfInv1.isnull().sum()
RowNumber 0 CustomerId 19 Tenure 19 CreditScore 0 Balance 3636 EstimatedSalary 19 dtype: int64
df5= dfInv1["Balance"].fillna(method ='ffill', inplace = True)
df5= dfInv1["EstimatedSalary"].fillna(method ='ffill', inplace = True)
df5= dfInv1["Tenure"].fillna(method ='ffill', inplace = True)
df
| RowNumber | CustomerId | Exited | |
|---|---|---|---|
| 0 | 1 | 15634602.0 | 1.0 |
| 1 | 2 | 15647311.0 | 0.0 |
| 2 | 3 | 15619304.0 | 1.0 |
| 3 | 4 | 15701354.0 | 0.0 |
| 4 | 5 | 15737888.0 | 0.0 |
| ... | ... | ... | ... |
| 9995 | 9996 | 15606229.0 | 0.0 |
| 9996 | 9997 | 15569892.0 | 0.0 |
| 9997 | 9998 | 15584532.0 | 1.0 |
| 9998 | 9999 | 15682355.0 | 1.0 |
| 9999 | 10000 | 15628319.0 | 0.0 |
10000 rows × 3 columns
dfInv1.describe
<bound method NDFrame.describe of RowNumber CustomerId Tenure CreditScore Balance EstimatedSalary 0 1 15634602.0 2.0 619.0 NaN 101348.88 1 2 15647311.0 1.0 608.0 83807.86 112542.58 2 3 15619304.0 8.0 502.0 159660.80 113931.57 3 4 15701354.0 1.0 699.0 159660.80 93826.63 4 5 15737888.0 2.0 850.0 125510.82 79084.10 ... ... ... ... ... ... ... 10014 10015 NaN 4.0 792.0 130142.79 38190.78 10015 10016 NaN 4.0 792.0 130142.79 38190.78 10016 10017 NaN 4.0 792.0 130142.79 38190.78 10017 10018 NaN 4.0 792.0 130142.79 38190.78 10018 10019 NaN 4.0 792.0 130142.79 38190.78 [10019 rows x 6 columns]>
dfInv1.describe
<bound method NDFrame.describe of RowNumber CustomerId Tenure CreditScore Balance EstimatedSalary 0 1 15634602.0 2.0 619.0 NaN 101348.88 1 2 15647311.0 1.0 608.0 83807.86 112542.58 2 3 15619304.0 8.0 502.0 159660.80 113931.57 3 4 15701354.0 1.0 699.0 159660.80 93826.63 4 5 15737888.0 2.0 850.0 125510.82 79084.10 ... ... ... ... ... ... ... 10014 10015 NaN 4.0 792.0 130142.79 38190.78 10015 10016 NaN 4.0 792.0 130142.79 38190.78 10016 10017 NaN 4.0 792.0 130142.79 38190.78 10017 10018 NaN 4.0 792.0 130142.79 38190.78 10018 10019 NaN 4.0 792.0 130142.79 38190.78 [10019 rows x 6 columns]>
dfInv1.isnull().sum()
RowNumber 0 CustomerId 19 Tenure 0 CreditScore 0 Balance 1 EstimatedSalary 0 dtype: int64
df6 = dfInv1.dropna(axis=0)
print(df6)
RowNumber CustomerId Tenure CreditScore Balance EstimatedSalary 1 2 15647311.0 1.0 608.0 83807.86 112542.58 2 3 15619304.0 8.0 502.0 159660.80 113931.57 3 4 15701354.0 1.0 699.0 159660.80 93826.63 4 5 15737888.0 2.0 850.0 125510.82 79084.10 5 6 15574012.0 8.0 645.0 113755.78 149756.71 ... ... ... ... ... ... ... 9995 9996 15606229.0 5.0 771.0 155060.41 96270.64 9996 9997 15569892.0 10.0 516.0 57369.61 101699.77 9997 9998 15584532.0 7.0 709.0 57369.61 42085.58 9998 9999 15682355.0 3.0 772.0 75075.31 92888.52 9999 10000 15628319.0 4.0 792.0 130142.79 38190.78 [9999 rows x 6 columns]
df6.isnull().sum()
RowNumber 0 CustomerId 0 Tenure 0 CreditScore 0 Balance 0 EstimatedSalary 0 dtype: int64
df7 =pd.merge(df4,df6,on='CustomerId',how='inner')
df7
| RowNumber_x | CustomerId | Exited | Surname | Geography | Gender | Age | RowNumber | Tenure | CreditScore | Balance | EstimatedSalary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | 15647311.0 | 0.0 | Hill | Central | Female | 41.0 | 2 | 1.0 | 608.0 | 83807.86 | 112542.58 |
| 1 | 3 | 15619304.0 | 1.0 | Onio | West | Female | 42.0 | 3 | 8.0 | 502.0 | 159660.80 | 113931.57 |
| 2 | 4 | 15701354.0 | 0.0 | Boni | West | Female | 39.0 | 4 | 1.0 | 699.0 | 159660.80 | 93826.63 |
| 3 | 5 | 15737888.0 | 0.0 | Mitchell | Central | Female | 43.0 | 5 | 2.0 | 850.0 | 125510.82 | 79084.10 |
| 4 | 6 | 15574012.0 | 1.0 | Chu | Central | Male | 44.0 | 6 | 8.0 | 645.0 | 113755.78 | 149756.71 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 9994 | 9996 | 15606229.0 | 0.0 | Obijiaku | West | Male | 39.0 | 9996 | 5.0 | 771.0 | 155060.41 | 96270.64 |
| 9995 | 9997 | 15569892.0 | 0.0 | Johnstone | West | Male | 35.0 | 9997 | 10.0 | 516.0 | 57369.61 | 101699.77 |
| 9996 | 9998 | 15584532.0 | 1.0 | Liu | West | Female | 36.0 | 9998 | 7.0 | 709.0 | 57369.61 | 42085.58 |
| 9997 | 9999 | 15682355.0 | 1.0 | Sabbatini | East | Male | 42.0 | 9999 | 3.0 | 772.0 | 75075.31 | 92888.52 |
| 9998 | 10000 | 15628319.0 | 0.0 | Walker | West | Female | 28.0 | 10000 | 4.0 | 792.0 | 130142.79 | 38190.78 |
9999 rows × 12 columns
df8= df7.drop(['RowNumber'],axis = 1)
df8
| RowNumber_x | CustomerId | Exited | Surname | Geography | Gender | Age | Tenure | CreditScore | Balance | EstimatedSalary | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | 15647311.0 | 0.0 | Hill | Central | Female | 41.0 | 1.0 | 608.0 | 83807.86 | 112542.58 |
| 1 | 3 | 15619304.0 | 1.0 | Onio | West | Female | 42.0 | 8.0 | 502.0 | 159660.80 | 113931.57 |
| 2 | 4 | 15701354.0 | 0.0 | Boni | West | Female | 39.0 | 1.0 | 699.0 | 159660.80 | 93826.63 |
| 3 | 5 | 15737888.0 | 0.0 | Mitchell | Central | Female | 43.0 | 2.0 | 850.0 | 125510.82 | 79084.10 |
| 4 | 6 | 15574012.0 | 1.0 | Chu | Central | Male | 44.0 | 8.0 | 645.0 | 113755.78 | 149756.71 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 9994 | 9996 | 15606229.0 | 0.0 | Obijiaku | West | Male | 39.0 | 5.0 | 771.0 | 155060.41 | 96270.64 |
| 9995 | 9997 | 15569892.0 | 0.0 | Johnstone | West | Male | 35.0 | 10.0 | 516.0 | 57369.61 | 101699.77 |
| 9996 | 9998 | 15584532.0 | 1.0 | Liu | West | Female | 36.0 | 7.0 | 709.0 | 57369.61 | 42085.58 |
| 9997 | 9999 | 15682355.0 | 1.0 | Sabbatini | East | Male | 42.0 | 3.0 | 772.0 | 75075.31 | 92888.52 |
| 9998 | 10000 | 15628319.0 | 0.0 | Walker | West | Female | 28.0 | 4.0 | 792.0 | 130142.79 | 38190.78 |
9999 rows × 11 columns
df8.isnull().sum()
RowNumber_x 0 CustomerId 0 Exited 0 Surname 0 Geography 0 Gender 0 Age 0 Tenure 0 CreditScore 0 Balance 0 EstimatedSalary 0 dtype: int64
df8.describe()
| RowNumber_x | CustomerId | Exited | Age | Tenure | CreditScore | Balance | EstimatedSalary | |
|---|---|---|---|---|---|---|---|---|
| count | 9999.000000 | 9.999000e+03 | 9999.00000 | 9999.000000 | 9999.000000 | 9999.000000 | 9999.000000 | 9999.000000 |
| mean | 5001.000000 | 1.569095e+07 | 0.20362 | 38.943194 | 5.015702 | 650.466147 | 119567.688658 | 100090.114004 |
| std | 2886.607005 | 7.193758e+04 | 0.40271 | 10.614212 | 2.902919 | 96.724029 | 30123.499324 | 57513.367468 |
| min | 2.000000 | 1.556570e+07 | 0.00000 | 18.000000 | 0.000000 | 305.000000 | 3768.690000 | 11.580000 |
| 25% | 2501.500000 | 1.562853e+07 | 0.00000 | 32.000000 | 3.000000 | 584.000000 | 99812.160000 | 50992.930000 |
| 50% | 5001.000000 | 1.569074e+07 | 0.00000 | 37.000000 | 5.000000 | 652.000000 | 119624.540000 | 100187.430000 |
| 75% | 7500.500000 | 1.575324e+07 | 0.00000 | 44.000000 | 7.500000 | 717.500000 | 138964.915000 | 149392.065000 |
| max | 10000.000000 | 1.581569e+07 | 1.00000 | 190.000000 | 30.000000 | 865.000000 | 250898.090000 | 199992.480000 |
df8.head()
| RowNumber_x | CustomerId | Exited | Surname | Geography | Gender | Age | Tenure | CreditScore | Balance | EstimatedSalary | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | 15647311.0 | 0.0 | Hill | Central | Female | 41.0 | 1.0 | 608.0 | 83807.86 | 112542.58 |
| 1 | 3 | 15619304.0 | 1.0 | Onio | West | Female | 42.0 | 8.0 | 502.0 | 159660.80 | 113931.57 |
| 2 | 4 | 15701354.0 | 0.0 | Boni | West | Female | 39.0 | 1.0 | 699.0 | 159660.80 | 93826.63 |
| 3 | 5 | 15737888.0 | 0.0 | Mitchell | Central | Female | 43.0 | 2.0 | 850.0 | 125510.82 | 79084.10 |
| 4 | 6 | 15574012.0 | 1.0 | Chu | Central | Male | 44.0 | 8.0 | 645.0 | 113755.78 | 149756.71 |
df8.tail()
| RowNumber_x | CustomerId | Exited | Surname | Geography | Gender | Age | Tenure | CreditScore | Balance | EstimatedSalary | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 9994 | 9996 | 15606229.0 | 0.0 | Obijiaku | West | Male | 39.0 | 5.0 | 771.0 | 155060.41 | 96270.64 |
| 9995 | 9997 | 15569892.0 | 0.0 | Johnstone | West | Male | 35.0 | 10.0 | 516.0 | 57369.61 | 101699.77 |
| 9996 | 9998 | 15584532.0 | 1.0 | Liu | West | Female | 36.0 | 7.0 | 709.0 | 57369.61 | 42085.58 |
| 9997 | 9999 | 15682355.0 | 1.0 | Sabbatini | East | Male | 42.0 | 3.0 | 772.0 | 75075.31 | 92888.52 |
| 9998 | 10000 | 15628319.0 | 0.0 | Walker | West | Female | 28.0 | 4.0 | 792.0 | 130142.79 | 38190.78 |
dfPort =pd.read_csv('G:\ANTWALK\Python Project\Python Project Solution\Given_Dataset\Customer Portfolio Snapshot.csv')
dfPort
| RowNumber | CustomerId | NumOfProducts | HasChckng | IsActiveMember | |
|---|---|---|---|---|---|
| 0 | 1288 | 15565701 | 1.0 | 0.0 | 0.0 |
| 1 | 4199 | 15565706 | 1.0 | 1.0 | 1.0 |
| 2 | 7091 | 15565714 | 2.0 | 0.0 | 1.0 |
| 3 | 2021 | 15565779 | 1.0 | 1.0 | 0.0 |
| 4 | 3698 | 15565796 | 1.0 | 1.0 | 0.0 |
| ... | ... | ... | ... | ... | ... |
| 10016 | 10015 | 15815705 | NaN | NaN | NaN |
| 10017 | 10016 | 15815706 | NaN | NaN | NaN |
| 10018 | 10017 | 15815707 | NaN | NaN | NaN |
| 10019 | 10018 | 15815708 | NaN | NaN | NaN |
| 10020 | 10019 | 15815709 | NaN | NaN | NaN |
10021 rows × 5 columns
dfPort.isnull().sum()
RowNumber 0 CustomerId 0 NumOfProducts 19 HasChckng 19 IsActiveMember 19 dtype: int64
df9= dfPort["NumOfProducts"].fillna(method ='ffill', inplace = True)
dfPort.isnull().sum()
RowNumber 0 CustomerId 0 NumOfProducts 0 HasChckng 19 IsActiveMember 19 dtype: int64
df9= dfPort["HasChckng"].fillna(method ='ffill', inplace = True)
df9= dfPort["IsActiveMember"].fillna(method ='ffill', inplace = True)
df10 =pd.merge(df8,dfPort,on='CustomerId',how='inner')
df10.isnull().sum()
RowNumber_x 0 CustomerId 0 Exited 0 Surname 0 Geography 0 Gender 0 Age 0 Tenure 0 CreditScore 0 Balance 0 EstimatedSalary 0 RowNumber 0 NumOfProducts 0 HasChckng 0 IsActiveMember 0 dtype: int64
df10.describe
<bound method NDFrame.describe of RowNumber_x CustomerId Exited Surname Geography Gender Age \
0 2 15647311.0 0.0 Hill Central Female 41.0
1 3 15619304.0 1.0 Onio West Female 42.0
2 4 15701354.0 0.0 Boni West Female 39.0
3 5 15737888.0 0.0 Mitchell Central Female 43.0
4 6 15574012.0 1.0 Chu Central Male 44.0
... ... ... ... ... ... ... ...
9996 9996 15606229.0 0.0 Obijiaku West Male 39.0
9997 9997 15569892.0 0.0 Johnstone West Male 35.0
9998 9998 15584532.0 1.0 Liu West Female 36.0
9999 9999 15682355.0 1.0 Sabbatini East Male 42.0
10000 10000 15628319.0 0.0 Walker West Female 28.0
Tenure CreditScore Balance EstimatedSalary RowNumber \
0 1.0 608.0 83807.86 112542.58 2
1 8.0 502.0 159660.80 113931.57 3
2 1.0 699.0 159660.80 93826.63 4
3 2.0 850.0 125510.82 79084.10 5
4 8.0 645.0 113755.78 149756.71 6
... ... ... ... ... ...
9996 5.0 771.0 155060.41 96270.64 9996
9997 10.0 516.0 57369.61 101699.77 9997
9998 7.0 709.0 57369.61 42085.58 9998
9999 3.0 772.0 75075.31 92888.52 9999
10000 4.0 792.0 130142.79 38190.78 10000
NumOfProducts HasChckng IsActiveMember
0 1.0 0.0 1.0
1 3.0 1.0 0.0
2 2.0 0.0 0.0
3 1.0 1.0 1.0
4 2.0 1.0 0.0
... ... ... ...
9996 2.0 1.0 0.0
9997 1.0 1.0 1.0
9998 1.0 0.0 1.0
9999 2.0 1.0 0.0
10000 1.0 1.0 0.0
[10001 rows x 15 columns]>
df.tail()
| RowNumber | CustomerId | Exited | |
|---|---|---|---|
| 9995 | 9996 | 15606229.0 | 0.0 |
| 9996 | 9997 | 15569892.0 | 0.0 |
| 9997 | 9998 | 15584532.0 | 1.0 |
| 9998 | 9999 | 15682355.0 | 1.0 |
| 9999 | 10000 | 15628319.0 | 0.0 |
df.head()
| RowNumber | CustomerId | Exited | |
|---|---|---|---|
| 0 | 1 | 15634602.0 | 1.0 |
| 1 | 2 | 15647311.0 | 0.0 |
| 2 | 3 | 15619304.0 | 1.0 |
| 3 | 4 | 15701354.0 | 0.0 |
| 4 | 5 | 15737888.0 | 0.0 |
df10
| RowNumber_x | CustomerId | Exited | Surname | Geography | Gender | Age | Tenure | CreditScore | Balance | EstimatedSalary | RowNumber | NumOfProducts | HasChckng | IsActiveMember | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | 15647311.0 | 0.0 | Hill | Central | Female | 41.0 | 1.0 | 608.0 | 83807.86 | 112542.58 | 2 | 1.0 | 0.0 | 1.0 |
| 1 | 3 | 15619304.0 | 1.0 | Onio | West | Female | 42.0 | 8.0 | 502.0 | 159660.80 | 113931.57 | 3 | 3.0 | 1.0 | 0.0 |
| 2 | 4 | 15701354.0 | 0.0 | Boni | West | Female | 39.0 | 1.0 | 699.0 | 159660.80 | 93826.63 | 4 | 2.0 | 0.0 | 0.0 |
| 3 | 5 | 15737888.0 | 0.0 | Mitchell | Central | Female | 43.0 | 2.0 | 850.0 | 125510.82 | 79084.10 | 5 | 1.0 | 1.0 | 1.0 |
| 4 | 6 | 15574012.0 | 1.0 | Chu | Central | Male | 44.0 | 8.0 | 645.0 | 113755.78 | 149756.71 | 6 | 2.0 | 1.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 9996 | 9996 | 15606229.0 | 0.0 | Obijiaku | West | Male | 39.0 | 5.0 | 771.0 | 155060.41 | 96270.64 | 9996 | 2.0 | 1.0 | 0.0 |
| 9997 | 9997 | 15569892.0 | 0.0 | Johnstone | West | Male | 35.0 | 10.0 | 516.0 | 57369.61 | 101699.77 | 9997 | 1.0 | 1.0 | 1.0 |
| 9998 | 9998 | 15584532.0 | 1.0 | Liu | West | Female | 36.0 | 7.0 | 709.0 | 57369.61 | 42085.58 | 9998 | 1.0 | 0.0 | 1.0 |
| 9999 | 9999 | 15682355.0 | 1.0 | Sabbatini | East | Male | 42.0 | 3.0 | 772.0 | 75075.31 | 92888.52 | 9999 | 2.0 | 1.0 | 0.0 |
| 10000 | 10000 | 15628319.0 | 0.0 | Walker | West | Female | 28.0 | 4.0 | 792.0 | 130142.79 | 38190.78 | 10000 | 1.0 | 1.0 | 0.0 |
10001 rows × 15 columns
df11= df10.drop(['RowNumber'],axis = 1)
df11
| RowNumber_x | CustomerId | Exited | Surname | Geography | Gender | Age | Tenure | CreditScore | Balance | EstimatedSalary | NumOfProducts | HasChckng | IsActiveMember | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | 15647311.0 | 0.0 | Hill | Central | Female | 41.0 | 1.0 | 608.0 | 83807.86 | 112542.58 | 1.0 | 0.0 | 1.0 |
| 1 | 3 | 15619304.0 | 1.0 | Onio | West | Female | 42.0 | 8.0 | 502.0 | 159660.80 | 113931.57 | 3.0 | 1.0 | 0.0 |
| 2 | 4 | 15701354.0 | 0.0 | Boni | West | Female | 39.0 | 1.0 | 699.0 | 159660.80 | 93826.63 | 2.0 | 0.0 | 0.0 |
| 3 | 5 | 15737888.0 | 0.0 | Mitchell | Central | Female | 43.0 | 2.0 | 850.0 | 125510.82 | 79084.10 | 1.0 | 1.0 | 1.0 |
| 4 | 6 | 15574012.0 | 1.0 | Chu | Central | Male | 44.0 | 8.0 | 645.0 | 113755.78 | 149756.71 | 2.0 | 1.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 9996 | 9996 | 15606229.0 | 0.0 | Obijiaku | West | Male | 39.0 | 5.0 | 771.0 | 155060.41 | 96270.64 | 2.0 | 1.0 | 0.0 |
| 9997 | 9997 | 15569892.0 | 0.0 | Johnstone | West | Male | 35.0 | 10.0 | 516.0 | 57369.61 | 101699.77 | 1.0 | 1.0 | 1.0 |
| 9998 | 9998 | 15584532.0 | 1.0 | Liu | West | Female | 36.0 | 7.0 | 709.0 | 57369.61 | 42085.58 | 1.0 | 0.0 | 1.0 |
| 9999 | 9999 | 15682355.0 | 1.0 | Sabbatini | East | Male | 42.0 | 3.0 | 772.0 | 75075.31 | 92888.52 | 2.0 | 1.0 | 0.0 |
| 10000 | 10000 | 15628319.0 | 0.0 | Walker | West | Female | 28.0 | 4.0 | 792.0 | 130142.79 | 38190.78 | 1.0 | 1.0 | 0.0 |
10001 rows × 14 columns
df12 = df11(['CustomerId'].astype(int))
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) ~\AppData\Local\Temp/ipykernel_3768/1581286670.py in <module> ----> 1 df12 = df11(['CustomerId'].astype(int)) AttributeError: 'list' object has no attribute 'astype'
df12
--------------------------------------------------------------------------- NameError Traceback (most recent call last) ~\AppData\Local\Temp/ipykernel_3768/2166137298.py in <module> ----> 1 df12 NameError: name 'df12' is not defined
df13 = df11.astype({"CustomerId":'int', "EstimatedSalary":'int',"Age":'int',"Tenure":'int',"CreditScore":'int',"Exited":'int'})
df14 = df13.astype({"HasChckng":'int'})
df14
| RowNumber_x | CustomerId | Exited | Surname | Geography | Gender | Age | Tenure | CreditScore | Balance | EstimatedSalary | NumOfProducts | HasChckng | IsActiveMember | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | 15647311 | 0 | Hill | Central | Female | 41 | 1 | 608 | 83807.86 | 112542 | 1.0 | 0 | 1.0 |
| 1 | 3 | 15619304 | 1 | Onio | West | Female | 42 | 8 | 502 | 159660.80 | 113931 | 3.0 | 1 | 0.0 |
| 2 | 4 | 15701354 | 0 | Boni | West | Female | 39 | 1 | 699 | 159660.80 | 93826 | 2.0 | 0 | 0.0 |
| 3 | 5 | 15737888 | 0 | Mitchell | Central | Female | 43 | 2 | 850 | 125510.82 | 79084 | 1.0 | 1 | 1.0 |
| 4 | 6 | 15574012 | 1 | Chu | Central | Male | 44 | 8 | 645 | 113755.78 | 149756 | 2.0 | 1 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 9996 | 9996 | 15606229 | 0 | Obijiaku | West | Male | 39 | 5 | 771 | 155060.41 | 96270 | 2.0 | 1 | 0.0 |
| 9997 | 9997 | 15569892 | 0 | Johnstone | West | Male | 35 | 10 | 516 | 57369.61 | 101699 | 1.0 | 1 | 1.0 |
| 9998 | 9998 | 15584532 | 1 | Liu | West | Female | 36 | 7 | 709 | 57369.61 | 42085 | 1.0 | 0 | 1.0 |
| 9999 | 9999 | 15682355 | 1 | Sabbatini | East | Male | 42 | 3 | 772 | 75075.31 | 92888 | 2.0 | 1 | 0.0 |
| 10000 | 10000 | 15628319 | 0 | Walker | West | Female | 28 | 4 | 792 | 130142.79 | 38190 | 1.0 | 1 | 0.0 |
10001 rows × 14 columns
display.df13.dtypes()
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) ~\AppData\Local\Temp/ipykernel_3768/1693962844.py in <module> ----> 1 display.df13.dtypes() AttributeError: 'function' object has no attribute 'df13'
display(df14.dtypes)
RowNumber_x int64 CustomerId int32 Exited int32 Surname object Geography object Gender object Age int32 Tenure int32 CreditScore int32 Balance float64 EstimatedSalary int32 NumOfProducts float64 HasChckng int32 IsActiveMember float64 dtype: object
df15 = df14.astype({"EstimatedSalary":'float'})
df15
| RowNumber_x | CustomerId | Exited | Surname | Geography | Gender | Age | Tenure | CreditScore | Balance | EstimatedSalary | NumOfProducts | HasChckng | IsActiveMember | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | 15647311 | 0 | Hill | Central | Female | 41 | 1 | 608 | 83807.86 | 112542.0 | 1.0 | 0 | 1.0 |
| 1 | 3 | 15619304 | 1 | Onio | West | Female | 42 | 8 | 502 | 159660.80 | 113931.0 | 3.0 | 1 | 0.0 |
| 2 | 4 | 15701354 | 0 | Boni | West | Female | 39 | 1 | 699 | 159660.80 | 93826.0 | 2.0 | 0 | 0.0 |
| 3 | 5 | 15737888 | 0 | Mitchell | Central | Female | 43 | 2 | 850 | 125510.82 | 79084.0 | 1.0 | 1 | 1.0 |
| 4 | 6 | 15574012 | 1 | Chu | Central | Male | 44 | 8 | 645 | 113755.78 | 149756.0 | 2.0 | 1 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 9996 | 9996 | 15606229 | 0 | Obijiaku | West | Male | 39 | 5 | 771 | 155060.41 | 96270.0 | 2.0 | 1 | 0.0 |
| 9997 | 9997 | 15569892 | 0 | Johnstone | West | Male | 35 | 10 | 516 | 57369.61 | 101699.0 | 1.0 | 1 | 1.0 |
| 9998 | 9998 | 15584532 | 1 | Liu | West | Female | 36 | 7 | 709 | 57369.61 | 42085.0 | 1.0 | 0 | 1.0 |
| 9999 | 9999 | 15682355 | 1 | Sabbatini | East | Male | 42 | 3 | 772 | 75075.31 | 92888.0 | 2.0 | 1 | 0.0 |
| 10000 | 10000 | 15628319 | 0 | Walker | West | Female | 28 | 4 | 792 | 130142.79 | 38190.0 | 1.0 | 1 | 0.0 |
10001 rows × 14 columns
display(df15.dtypes)
RowNumber_x int64 CustomerId int32 Exited int32 Surname object Geography object Gender object Age int32 Tenure int32 CreditScore int32 Balance float64 EstimatedSalary float64 NumOfProducts float64 HasChckng int32 IsActiveMember float64 dtype: object
df16 = df14.astype({"IsActiveMember":'int'})
display(df16.dtypes)
RowNumber_x int64 CustomerId int32 Exited int32 Surname object Geography object Gender object Age int32 Tenure int32 CreditScore int32 Balance float64 EstimatedSalary int32 NumOfProducts float64 HasChckng int32 IsActiveMember int32 dtype: object
df17 = df14.astype({"NumOfProducts":'int',"EstimatedSalary":'float' })
df17
| RowNumber_x | CustomerId | Exited | Surname | Geography | Gender | Age | Tenure | CreditScore | Balance | EstimatedSalary | NumOfProducts | HasChckng | IsActiveMember | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | 15647311 | 0 | Hill | Central | Female | 41 | 1 | 608 | 83807.86 | 112542.0 | 1 | 0 | 1.0 |
| 1 | 3 | 15619304 | 1 | Onio | West | Female | 42 | 8 | 502 | 159660.80 | 113931.0 | 3 | 1 | 0.0 |
| 2 | 4 | 15701354 | 0 | Boni | West | Female | 39 | 1 | 699 | 159660.80 | 93826.0 | 2 | 0 | 0.0 |
| 3 | 5 | 15737888 | 0 | Mitchell | Central | Female | 43 | 2 | 850 | 125510.82 | 79084.0 | 1 | 1 | 1.0 |
| 4 | 6 | 15574012 | 1 | Chu | Central | Male | 44 | 8 | 645 | 113755.78 | 149756.0 | 2 | 1 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 9996 | 9996 | 15606229 | 0 | Obijiaku | West | Male | 39 | 5 | 771 | 155060.41 | 96270.0 | 2 | 1 | 0.0 |
| 9997 | 9997 | 15569892 | 0 | Johnstone | West | Male | 35 | 10 | 516 | 57369.61 | 101699.0 | 1 | 1 | 1.0 |
| 9998 | 9998 | 15584532 | 1 | Liu | West | Female | 36 | 7 | 709 | 57369.61 | 42085.0 | 1 | 0 | 1.0 |
| 9999 | 9999 | 15682355 | 1 | Sabbatini | East | Male | 42 | 3 | 772 | 75075.31 | 92888.0 | 2 | 1 | 0.0 |
| 10000 | 10000 | 15628319 | 0 | Walker | West | Female | 28 | 4 | 792 | 130142.79 | 38190.0 | 1 | 1 | 0.0 |
10001 rows × 14 columns
import seaborn as sns
df17.head()
| RowNumber_x | CustomerId | Exited | Surname | Geography | Gender | Age | Tenure | CreditScore | Balance | EstimatedSalary | NumOfProducts | HasChckng | IsActiveMember | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | 15647311 | 0 | Hill | Central | Female | 41 | 1 | 608 | 83807.86 | 112542.0 | 1 | 0 | 1.0 |
| 1 | 3 | 15619304 | 1 | Onio | West | Female | 42 | 8 | 502 | 159660.80 | 113931.0 | 3 | 1 | 0.0 |
| 2 | 4 | 15701354 | 0 | Boni | West | Female | 39 | 1 | 699 | 159660.80 | 93826.0 | 2 | 0 | 0.0 |
| 3 | 5 | 15737888 | 0 | Mitchell | Central | Female | 43 | 2 | 850 | 125510.82 | 79084.0 | 1 | 1 | 1.0 |
| 4 | 6 | 15574012 | 1 | Chu | Central | Male | 44 | 8 | 645 | 113755.78 | 149756.0 | 2 | 1 | 0.0 |
df17.rename({("RowNumber_x","RowNumber")})
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) ~\AppData\Local\Temp/ipykernel_3768/4111734053.py in <module> ----> 1 df17.rename({("RowNumber_x","RowNumber")}) C:\ProgramData\Anaconda3\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs) 322 @wraps(func) 323 def wrapper(*args, **kwargs) -> Callable[..., Any]: --> 324 return func(*args, **kwargs) 325 326 kind = inspect.Parameter.POSITIONAL_OR_KEYWORD C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in rename(self, mapper, index, columns, axis, copy, inplace, level, errors) 5037 4 3 6 5038 """ -> 5039 return super().rename( 5040 mapper=mapper, 5041 index=index, C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in rename(self, mapper, index, columns, axis, copy, inplace, level, errors) 1154 raise KeyError(f"{missing_labels} not found in axis") 1155 -> 1156 new_index = ax._transform_index(f, level) 1157 result._set_axis_nocheck(new_index, axis=axis_no, inplace=True) 1158 result._clear_item_cache() C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in _transform_index(self, func, level) 5544 return type(self).from_tuples(items, names=self.names) 5545 else: -> 5546 items = [func(x) for x in self] 5547 return Index(items, name=self.name, tupleize_cols=False) 5548 C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in <listcomp>(.0) 5544 return type(self).from_tuples(items, names=self.names) 5545 else: -> 5546 items = [func(x) for x in self] 5547 return Index(items, name=self.name, tupleize_cols=False) 5548 TypeError: 'set' object is not callable
df17.corr()
| RowNumber_x | CustomerId | Exited | Age | Tenure | CreditScore | Balance | EstimatedSalary | NumOfProducts | HasChckng | IsActiveMember | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| RowNumber_x | 1.000000 | 0.003785 | -0.016548 | -0.000407 | -0.008031 | 0.006779 | 0.001511 | -0.006201 | 0.006675 | 0.000962 | 0.012380 |
| CustomerId | 0.003785 | 1.000000 | -0.005406 | 0.008682 | -0.015697 | 0.005952 | -0.004549 | 0.015729 | 0.017760 | -0.014503 | 0.001384 |
| Exited | -0.016548 | -0.005406 | 1.000000 | 0.280832 | -0.012380 | -0.027459 | 0.012649 | 0.012617 | -0.046604 | -0.007874 | -0.156688 |
| Age | -0.000407 | 0.008682 | 0.280832 | 1.000000 | -0.009015 | -0.004014 | -0.016728 | -0.009153 | -0.028571 | -0.013325 | 0.082298 |
| Tenure | -0.008031 | -0.015697 | -0.012380 | -0.009015 | 1.000000 | -0.002303 | 0.008880 | 0.007777 | 0.016567 | 0.023460 | -0.028862 |
| CreditScore | 0.006779 | 0.005952 | -0.027459 | -0.004014 | -0.002303 | 1.000000 | -0.002624 | -0.002456 | 0.012346 | -0.005914 | 0.025734 |
| Balance | 0.001511 | -0.004549 | 0.012649 | -0.016728 | 0.008880 | -0.002624 | 1.000000 | 0.000653 | -0.009113 | 0.012899 | -0.012080 |
| EstimatedSalary | -0.006201 | 0.015729 | 0.012617 | -0.009153 | 0.007777 | -0.002456 | 0.000653 | 1.000000 | 0.014871 | -0.010345 | -0.011696 |
| NumOfProducts | 0.006675 | 0.017760 | -0.046604 | -0.028571 | 0.016567 | 0.012346 | -0.009113 | 0.014871 | 1.000000 | 0.002458 | 0.009174 |
| HasChckng | 0.000962 | -0.014503 | -0.007874 | -0.013325 | 0.023460 | -0.005914 | 0.012899 | -0.010345 | 0.002458 | 1.000000 | -0.011606 |
| IsActiveMember | 0.012380 | 0.001384 | -0.156688 | 0.082298 | -0.028862 | 0.025734 | -0.012080 | -0.011696 | 0.009174 | -0.011606 | 1.000000 |
sns.heatmap(df17.corr())
<AxesSubplot:>
sns.jointplot(x='Balance',y='NumOfProducts',data=df17,kind='hex')
<seaborn.axisgrid.JointGrid at 0x2e1361fda30>
sns.jointplot(x='Balance',y='NumOfProducts',data=df17,kind='reg')
<seaborn.axisgrid.JointGrid at 0x2e13a939430>
sns.jointplot(x='IsActiveMember',y='Exited',data=df17,kind='reg')
<seaborn.axisgrid.JointGrid at 0x2e13ae0c580>
sns.pairplot(df17)
<seaborn.axisgrid.PairGrid at 0x2e13c8e9730>
sns.pairplot(df17,hue='Exited')
<seaborn.axisgrid.PairGrid at 0x2e1504b2640>
sns.distplot(df17['Exited'],kde=False,)
<AxesSubplot:xlabel='Exited'>
sns.distplot(df17['IsActiveMember'],kde=False)
<AxesSubplot:xlabel='IsActiveMember'>
sns.countplot('IsActiveMember',data=df17)
<AxesSubplot:xlabel='IsActiveMember', ylabel='count'>
sns.countplot('Exited',data=df17)
<AxesSubplot:xlabel='Exited', ylabel='count'>
sns.countplot('CreditScore',data=df17)
<AxesSubplot:xlabel='CreditScore', ylabel='count'>
sns.countplot('Tenure',data=df17)
<AxesSubplot:xlabel='Tenure', ylabel='count'>
sns.countplot(y='IsActiveMember',data=df17)
<AxesSubplot:xlabel='count', ylabel='IsActiveMember'>
sns.boxplot(x='Exited',y='IsActiveMember',data=df17)
<AxesSubplot:xlabel='Exited', ylabel='IsActiveMember'>
sns.boxplot(y='Age',x='IsActiveMember',data=df17)
<AxesSubplot:xlabel='IsActiveMember', ylabel='Age'>
sns.boxplot(y='Age',x='NumOfProducts',data=df17)
<AxesSubplot:xlabel='NumOfProducts', ylabel='Age'>
sns.boxplot(y='EstimatedSalary',x='NumOfProducts',data=df17)
<AxesSubplot:xlabel='NumOfProducts', ylabel='EstimatedSalary'>
sns.violinplot(y='EstimatedSalary',x='NumOfProducts',data=df17)
<AxesSubplot:xlabel='NumOfProducts', ylabel='EstimatedSalary'>